home *** CD-ROM | disk | FTP | other *** search
Text File | 1988-12-23 | 13.8 KB | 349 lines | [TEXT/EDIT] |
- ; Hash tables for MacScheme.
- ;
- ; (sxhash x)
- ; returns a fixnum hash code for x with the property that
- ; (equal? x y) implies (= (sxhash x) (sxhash y)).
- ;
- ; (make-eq?-hashtable)
- ; (make-eqv?-hashtable)
- ; (make-equal?-hashtable)
- ;
- ; These three procedures return a hash table h with the
- ; following operations:
- ;
- ; (h 'population)
- ; returns the number of items in the hash table.
- ; ((h 'rehash) newsize)
- ; rehashes the table to a size that can accomodate at least
- ; newsize items. Rehashing occurs automatically as needed.
- ; ((h 'lookup) x)
- ; returns the entry for x, or #f if there is none.
- ; ((h 'change) x entry)
- ; changes the entry for x to be entry, signalling an error
- ; if x is not already in the table.
- ; ((h 'add) x entry)
- ; changes the entry for x to be entry, creating a new item
- ; if x is not already in the table.
- ; ((h 'remove) x)
- ; removes x from the table.
- ; (h 'clear)
- ; clears all items from the table.
- ; ((h 'for-each) f)
- ; calls f once for every item in the table. The two arguments
- ; to f are the key and the entry associated with that key.
- ;
- ; The three kinds of hash table objects correspond roughly to
- ; association lists searched using assq, assv, and assoc.
-
- ; These hash tables are not blindingly fast. Association lists
- ; are faster for tables with fewer than 100 or so entries.
-
- ; Implementation note. Hash-on-eq? is implemented by using the
- ; pointer part of a object as the hash code. Since MacScheme
- ; uses a relocating garbage collector, the hash table must be
- ; rehashed following a garbage collection. This implementation
- ; delays the rehashing as long as possible instead of rehashing
- ; after every garbage collection.
- ;
- ; Most operations must check that the garbage collector has not
- ; run since the table was last rehashed, and the garbage collector
- ; must not run between this check and the end of the operation.
- ; This means that the critical part of each operation cannot
- ; allocate any storage. Such code cannot be written without
- ; detailed knowledge of the compiler and run-time system. For
- ; example, MacScheme's interrupt system allocates storage so
- ; most hash table operations have to run with interrupts disabled.
-
- ; This definition of call-without-interrupts was extracted from
- ; Library:Basic:general.sch.
-
- (define (call-without-interrupts thunk)
- (let ((mask (syscall (vector -23 1)))
- (ans 0))
- (syscall (vector -23 (logior mask 1)))
- (set! ans (thunk))
- (syscall (vector -23 mask))
- ans))
-
- (define (make-eq?-hashtable)
- (let ((tablesize 64)
- (unused (list #f))
- (deleted (list #f)))
- (let ((keys (make-vector tablesize unused))
- (entries (make-vector tablesize #f))
- (population 0)
- (gcinfo (vector -1 0 0 0))
- (gccount 0))
- (define (rehash-if-necessary)
- (if (not (valid?))
- (rehash tablesize)))
- (define (valid?)
- (syscall gcinfo)
- (= (vector-ref gcinfo 1) gccount))
- (define (hash x)
- (remainder (+ (typetag x) (typetag-set! x 0)) tablesize))
- (define (search x i)
- (let ((y (vector-ref keys i)))
- (cond ((eq? y x) i)
- ((eq? y unused) #f)
- (else (searchloop x (+ i 1) i)))))
- (define (searchloop x i stop)
- (cond ((= i tablesize) (searchloop x 0 stop))
- ((= i stop) #f)
- (else (let ((y (vector-ref keys i)))
- (cond ((eq? y x) i)
- ((eq? y unused) #f)
- (else (searchloop x (+ i 1) stop)))))))
- (define (nextavail i)
- (let ((y (vector-ref keys i)))
- (cond ((eq? y unused) i)
- ((eq? y deleted) i)
- (else (nextavailloop (+ i 1) i)))))
- (define (nextavailloop i stop)
- (cond ((= i tablesize) (nextavailloop 0 stop))
- ((= i stop) ???)
- (else (let ((y (vector-ref keys i)))
- (cond ((eq? y unused) i)
- ((eq? y deleted) i)
- (else (nextavailloop (+ i 1) stop)))))))
- (define (rehash newsize)
- (call-without-interrupts
- (lambda ()
- (if (< newsize population)
- (rehash population)
- (let ((oldkeys keys)
- (oldentries entries)
- (oldsize tablesize))
- (set! keys (make-vector newsize unused))
- (set! entries (make-vector newsize #f))
- (set! tablesize newsize)
- (syscall gcinfo)
- (set! gccount (vector-ref gcinfo 1))
- (do ((i (- oldsize 1) (- i 1)))
- ((< i 0) #t)
- (let ((x (vector-ref oldkeys i)))
- (cond ((eq? x unused) #f)
- ((eq? x deleted) #f)
- (else (let ((j (nextavail (hash x))))
- (vector-set! keys j x)
- (vector-set! entries
- j
- (vector-ref oldentries i))))))))))))
- (define (lookup x)
- (call-without-interrupts
- (lambda ()
- (rehash-if-necessary)
- (let ((i (search x (hash x))))
- (if i
- (vector-ref entries i)
- #f)))))
- (define (change x entry)
- (call-without-interrupts
- (lambda ()
- (rehash-if-necessary)
- (let ((i (search x (hash x))))
- (if i
- (begin (vector-set! entries i entry) #t)
- (error "Hashtable entry not found" x))))))
- (define (add x entry)
- (call-without-interrupts
- (lambda ()
- (rehash-if-necessary)
- (let ((i (search x (hash x))))
- (if i
- (begin (vector-set! entries i entry) #t)
- (begin
- (if (>= (quotient (* 10 population) tablesize) 9)
- (rehash (* 2 tablesize)))
- (let ((i (nextavail (hash x))))
- (vector-set! keys i x)
- (vector-set! entries i entry)
- (set! population (+ population 1))
- #t)))))))
- (define (rem x)
- (call-without-interrupts
- (lambda ()
- (rehash-if-necessary)
- (let ((i (search x (hash x))))
- (if i
- (begin (vector-set! keys i deleted)
- (vector-set! entries i #f)
- (set! population (- population 1))
- #t)
- #f)))))
- (define (clear)
- (call-without-interrupts
- (lambda ()
- (do ((i (- tablesize 1) (- i 1)))
- ((< i 0) (set! population 0) #t)
- (vector-set! keys i unused)
- (vector-set! entries i #f)))))
- (define (foreach f)
- (apply for-each
- (cons f (call-without-interrupts keys&entries))))
- (define (keys&entries)
- (keys&entries-loop (- tablesize 1) '() '()))
- (define (keys&entries-loop i l1 l2)
- (if (< i 0)
- (list l1 l2)
- (let ((y (vector-ref keys i)))
- (cond ((eq? y unused) (keys&entries-loop (- i 1) l1 l2))
- ((eq? y deleted) (keys&entries-loop (- i 1) l1 l2))
- (else (keys&entries-loop (- i 1)
- (cons y l1)
- (cons (vector-ref entries i) l2)))))))
- (syscall gcinfo)
- (set! gccount (vector-ref gcinfo 1))
- (%object self
- ((population) population)
- ((rehash newsize) (rehash newsize))
- ((lookup x) (lookup x))
- ((change x entry) (change x entry))
- ((add x entry) (add x entry))
- ((remove x) (rem x))
- ((clear) (clear))
- ((for-each f) (foreach f))))))
-
- ; hash-on-eqv? tables are implemented using a hash-on-equal? table
- ; for numbers and a hash-on-eq? table for everything else.
-
- (define (make-eqv?-hashtable)
- (let ((h1 (make-eq?-hashtable))
- (h2 (make-equal?-hashtable)))
- (%object self
- ((population)
- (+ (h1 'population) (h2 'population)))
- ((rehash newsize)
- ((h1 'rehash) newsize) ((h2 'rehash) newsize))
- ((lookup x)
- (((if (number? x) h2 h1) 'lookup) x))
- ((change x entry)
- (((if (number? x) h2 h1) 'change) x entry))
- ((add x entry)
- (((if (number? x) h2 h1) 'add) x entry))
- ((remove x)
- (((if (number? x) h2 h1) 'remove) x))
- ((clear)
- (h1 'clear) (h2 'clear))
- ((for-each f)
- ((h1 'for-each) f)
- ((h2 'for-each) f)))))
-
- ; A hash-on-equal? table is implemented using sxhash and a hash-on-eq?
- ; table whose entries are association lists.
-
- (define (make-equal?-hashtable)
- (let ((h (make-eq?-hashtable))
- (population 0))
- (define (change-error x)
- (error "Hashtable entry not found" x))
- (define (lookup x)
- (let ((item (assoc x ((h 'lookup) (sxhash x)))))
- (if item
- (cdr item)
- #f)))
- (define (change x entry)
- (call-without-interrupts
- (lambda ()
- (let ((bucket ((h 'lookup) (sxhash x))))
- (if bucket
- (let ((item (assoc x bucket)))
- (if item
- (begin (set-cdr! item entry) #t)
- (change-error x)))
- (change-error x))))))
- (define (add x entry)
- (call-without-interrupts
- (lambda ()
- (let ((k (sxhash x)))
- (let ((bucket ((h 'lookup) k)))
- (if bucket
- (let ((item (assoc x bucket)))
- (if item
- (begin (set-cdr! item entry) #t)
- (begin ((h 'change) k (cons (cons x entry) bucket))
- (set! population (+ population 1))
- #t)))
- ((h 'add) k (list (cons x entry)))))))))
- (define (rem x)
- (call-without-interrupts
- (lambda ()
- (let ((k (sxhash x)))
- (let ((bucket ((h 'lookup) k)))
- (if bucket
- (let ((item (assoc x bucket)))
- (if item
- (begin ((h 'change) k (remove item bucket))
- (set! population (- population 1))
- #t)
- #f))
- #f))))))
- (define (foreach f)
- ((h 'for-each)
- (lambda (ignore bucket)
- (for-each f (map car bucket) (map cdr bucket)))))
- (%object self
- ((population) population)
- ((rehash newsize) ((h 'rehash) newsize))
- ((lookup x) (lookup x))
- ((change x entry) (change x entry))
- ((add x entry) (add x entry))
- ((remove x) (rem x))
- ((clear) (h 'clear) (set! population 0) #t)
- ((for-each f) (foreach f)))))
-
- (define sxhash
- (letrec ((sxhash
- (lambda (x n)
- (let ((tt (typetag x)))
- (cond ((<= n 0) (lsh tt 14))
- ((fixnum? x) (logand mask (+ mask x)))
- ((pair? x)
- (logand mask
- (+ tt
- (remainder (* (sxhash (car x) (- n 1))
- (- (sxhash (cdr x) (- n 1)) 1))
- mask))))
- ((symbol? x)
- (logand mask (+ 10000 (sxhash (symbol->string x) n))))
- ((procedure? x)
- (logand mask (+ 10001 (sxhash (->pair x) n))))
- ((vector? x)
- (let ((m (vector-length x)))
- (logand mask
- (+ tt
- m
- (if (> m 0)
- (+ (sxhash (vector-ref x 0) (- n 1))
- (sxhash (vector-ref x (quotient m 2)) (- n 1))
- (sxhash (vector-ref x (- m 1)) (- n 1)))
- 0)))))
- ((bytevector? x)
- (let ((m (bytevector-length x)))
- (do ((h (+ tt m) (logand mask (+ h (lsh (bytevector-ref x i) j))))
- (i (min 16 (- m 1)) (- i 1))
- (j 0 (+ j 1)))
- ((< i 0) h))))
- ((string? x)
- (let ((m (string-length x)))
- (logand mask
- (+ tt
- m
- (if (> m 0) (char->integer (string-ref x 0)) -3253)
- (if (> m 1) (lsh (char->integer (string-ref x 1)) 8) 333)
- (if (> m 2) (lsh (char->integer (string-ref x 2)) 16) 135079)
- (if (> m 3) (char->integer (string-ref x 3)) -28301)
- (if (> m 4) (lsh (char->integer (string-ref x 4)) 8) 947)))))
- ((< tt #x40)
- (+ (lsh tt 17) (logand mask (typetag-set! x 0))))
- ((< tt #x50)
- (+ (- tt #x50) (sxhash (typetag-set! x #x40) n)))
- ((< tt #x60)
- (+ (- tt #x60) (sxhash (typetag-set! x #x50) n)))
- ((< tt #x70)
- (+ (- tt #x70) (sxhash (typetag-set! x #x60) n)))
- (else (+ 1000000 tt))))))
- (mask (- (expt 2 24) 1)))
- (lambda (x) (sxhash x 5))))
-
-